document.write('') document.write('
<\/td>\n | // Learn more about F# at http://fsharp.net<\/td>\n <\/tr>\n |
<\/td>\n | // See the 'F# Tutorial' project for more help.<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | open System.IO<\/td>\n <\/tr>\n |
<\/td>\n | open SevenZip<\/td>\n <\/tr>\n |
<\/td>\n | open System<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | SevenZipCompressor.SetLibraryPath(@"path/to/7z.dll")<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let dir = @"files/path/..."<\/td>\n <\/tr>\n |
<\/td>\n | let txts = Directory.GetFiles(dir + @"Test")<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let sz = SevenZip.SevenZipCompressor()<\/td>\n <\/tr>\n |
<\/td>\n | let st = System.Diagnostics.Stopwatch()<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | sz.CompressionMethod <- CompressionMethod.Ppmd<\/td>\n <\/tr>\n |
<\/td>\n | sz.CompressionLevel <- CompressionLevel.Low<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let compress2 (f:byte[]) = <\/td>\n <\/tr>\n |
<\/td>\n | use mio = new MemoryStream(f)<\/td>\n <\/tr>\n |
<\/td>\n | use m2 = new MemoryStream(f.Length * 2) <\/td>\n <\/tr>\n |
<\/td>\n | sz.CompressStream(mio, m2)<\/td>\n <\/tr>\n |
<\/td>\n | m2.GetBuffer().[0..int m2.Length - 1]<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | st.Start()<\/td>\n <\/tr>\n |
<\/td>\n | let compressionMap = txts |> Array.map (fun f -> Path.GetFileNameWithoutExtension f, f |> File.ReadAllBytes |> compress2) |> Map.ofArray<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let compredist f1 f2 n1 n2 =<\/td>\n <\/tr>\n |
<\/td>\n | let code = compressionMap.[n1]<\/td>\n <\/tr>\n |
<\/td>\n | let code2 = compressionMap.[n2]<\/td>\n <\/tr>\n |
<\/td>\n | let fxy = Array.append f1 f2<\/td>\n <\/tr>\n |
<\/td>\n | let code3 = compress2 (fxy) <\/td>\n <\/tr>\n |
<\/td>\n | float(code3.Length - (min (code.Length) (code2.Length))) / float(max (code.Length) (code2.Length)) <\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let nearEdges = [| for f in txts -> let n1 = Path.GetFileNameWithoutExtension f <\/td>\n <\/tr>\n |
<\/td>\n | n1, <\/td>\n <\/tr>\n |
<\/td>\n | let fbytes = File.ReadAllBytes f <\/td>\n <\/tr>\n |
<\/td>\n | txts |> Array.map (fun fname -> <\/td>\n <\/tr>\n |
<\/td>\n | let n2 = Path.GetFileNameWithoutExtension fname<\/td>\n <\/tr>\n |
<\/td>\n | Path.GetFileNameWithoutExtension fname, compredist fbytes (File.ReadAllBytes(fname)) n1 n2) <\/td>\n <\/tr>\n |
<\/td>\n | |> Array.sortBy snd|]<\/td>\n <\/tr>\n |
<\/td>\n | st.Stop() <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let nearEdgesMap = nearEdges |> Map.ofArray<\/td>\n <\/tr>\n |
<\/td>\n | let pairs = nearEdgesMap |> Map.map (fun _ v -> Map.ofArray v)<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | printfn "%A" nearEdges <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | type 'a Tree =<\/td>\n <\/tr>\n |
<\/td>\n | | Node of 'a<\/td>\n <\/tr>\n |
<\/td>\n | | Branch of 'a Tree * 'a Tree<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | type Cluster<'a when 'a : comparison> = <\/td>\n <\/tr>\n |
<\/td>\n | | Singleton of 'a Set<\/td>\n <\/tr>\n |
<\/td>\n | | Clusters of 'a Set * 'a Tree<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let completelinkage (ps:Map<'a, Map<'a,float>>) (a: 'a Set) (b:'a Set) = <\/td>\n <\/tr>\n |
<\/td>\n | a |> Set.map (fun item1 -> b |> Set.map (fun item2 -> ps.[item1].[item2]) <\/td>\n <\/tr>\n |
<\/td>\n | |> Set.maxElement) //we only want the two largest pair distances) <\/td>\n <\/tr>\n |
<\/td>\n | |> Set.maxElement<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let distclust ps = function <\/td>\n <\/tr>\n |
<\/td>\n | | Singleton (item), Clusters(items, _) -> completelinkage ps item items<\/td>\n <\/tr>\n |
<\/td>\n | | Clusters (items, _) , Singleton(item) -> completelinkage ps items item<\/td>\n <\/tr>\n |
<\/td>\n | | Clusters (items1, _), Clusters(items2, _) -> completelinkage ps items1 items2<\/td>\n <\/tr>\n |
<\/td>\n | | Singleton (item1) , Singleton(item2) -> ps.[item1.MaximumElement].[item2.MaximumElement]<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let mergeClusters = function <\/td>\n <\/tr>\n |
<\/td>\n | | Singleton (item), Clusters(items, dendogram) <\/td>\n <\/tr>\n |
<\/td>\n | | Clusters (items, dendogram) , Singleton(item) -> Clusters(Set.union item items, Branch(dendogram, Node item.MinimumElement))<\/td>\n <\/tr>\n |
<\/td>\n | | Clusters (items1, dendogram1), Clusters(items2, dendogram2) -> Clusters(Set.union items1 items2, Branch(dendogram1, dendogram2))<\/td>\n <\/tr>\n |
<\/td>\n | | Singleton (item1) , Singleton(item2) -> Clusters(Set.union item1 item2, Branch(Node item1.MinimumElement, Node item2.MinimumElement))<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let r = Random()<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | (* <\/td>\n <\/tr>\n |
<\/td>\n | A function that takes a cluster and a set of clusters and finds the nearest item using cluster dist functions<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | A function that takes a cluster and an item and calculates distance as maxdist (item, clustermember)<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | There is a map that holds every item and its neighbiors<\/td>\n <\/tr>\n |
<\/td>\n | If we have an item we find the closest item by looking it up in the map.<\/td>\n <\/tr>\n |
<\/td>\n | But we also need to find the closest in the cluster. So we must compare the item to a cluster<\/td>\n <\/tr>\n |
<\/td>\n | To do this we for each cluster, compare the distance to our current item<\/td>\n <\/tr>\n |
<\/td>\n | If an item is closest we add the merged 2 to the cluster stack as a branch and remove the item from actives<\/td>\n <\/tr>\n |
<\/td>\n | If a cluster is closest we merge the item to the tree, remove it from the cluster stack and add the new tree to the stack<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | If the next item we are looking at is a cluster we must find the closest item.<\/td>\n <\/tr>\n |
<\/td>\n | To find it in the single set we map each item to its distance from the cluster using dist clust<\/td>\n <\/tr>\n |
<\/td>\n | We also sort the cluster set by distance from current cluster<\/td>\n <\/tr>\n |
<\/td>\n | Again if the single item is the closest we merge with cluster and remove from map;<\/td>\n <\/tr>\n |
<\/td>\n | If the cluster is the closest we remove both clusters from clusterset, merge them and put them back<\/td>\n <\/tr>\n |
<\/td>\n | Recurse <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | Item , Item -> Pack as a Singleton<\/td>\n <\/tr>\n |
<\/td>\n | *)<\/td>\n <\/tr>\n |
<\/td>\n | // (clusterset : Map<string, string Cluster>) <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let asCluster x = Singleton (set [x])<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let closestinActives distances cluster (item : string Set) = <\/td>\n <\/tr>\n |
<\/td>\n | item |> Set.map (fun s -> distclust distances (asCluster s, cluster), s)<\/td>\n <\/tr>\n |
<\/td>\n | |> Set.minElement <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let find points first closest (distances : Map<string, Map<string,float>>) = <\/td>\n <\/tr>\n |
<\/td>\n | let initialActives = points |> Set.ofArray |> Set.remove first <\/td>\n <\/tr>\n |
<\/td>\n | |> Set.remove closest <\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let rec seek (stack : string Cluster list) (actives : string Set) = <\/td>\n <\/tr>\n |
<\/td>\n | let current = stack.Head <\/td>\n <\/tr>\n |
<\/td>\n | if stack.Length = 1 && actives = Set.empty then current<\/td>\n <\/tr>\n |
<\/td>\n | else <\/td>\n <\/tr>\n |
<\/td>\n | let nextDist, next = if actives.Count = 0 then Double.MaxValue,"" else closestinActives distances current actives<\/td>\n <\/tr>\n |
<\/td>\n | if stack.Length = 1 then seek (asCluster next :: stack) (actives.Remove(next))<\/td>\n <\/tr>\n |
<\/td>\n | else let topofstack = stack.Tail.Head <\/td>\n <\/tr>\n |
<\/td>\n | let stackDist = distclust distances (topofstack, current) <\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | if nextDist < stackDist then<\/td>\n <\/tr>\n |
<\/td>\n | seek (asCluster next :: stack) (actives.Remove(next))<\/td>\n <\/tr>\n |
<\/td>\n | else seek ((mergeClusters (current, topofstack)) :: (stack.Tail.Tail)) actives<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | seek [asCluster closest ; asCluster first] initialActives<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let rec toGraph depth = function<\/td>\n <\/tr>\n |
<\/td>\n | | Node(x) -> x, "", " node\\r\\n [\\r\\n id\\t\\""+x+"\\"\\r\\n label\\t\\"" + x + "\\"\\r\\n ]\\r\\n"<\/td>\n <\/tr>\n |
<\/td>\n | | Branch(ltree,rtree) -> let lname, lgraph, names1 = toGraph (depth + 1) ltree<\/td>\n <\/tr>\n |
<\/td>\n | let rname, rgraph, names2 = toGraph (depth + 1) rtree<\/td>\n <\/tr>\n |
<\/td>\n | let name = string (r.Next(0, int(2. ** (float depth + 9.))) )<\/td>\n <\/tr>\n |
<\/td>\n | name, sprintf "%s\\r\\n%s\\r\\n edge\\r\\n [\\r\\n source\\t\\"%s\\"\\r\\n target\\t\\"%s\\"\\r\\n ]\\r\\n edge\\r\\n [\\r\\n source\\t\\"%s\\"\\r\\n target\\t\\"%s\\"\\r\\n ]" <\/td>\n <\/tr>\n |
<\/td>\n | lgraph rgraph name lname name rname, <\/td>\n <\/tr>\n |
<\/td>\n | (sprintf " node\\r\\n [\\r\\n id\\t\\"%s\\"\\r\\n label\\t\\"\\"\\r\\n ]\\r\\n" name) + names1 + names2<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let first = fst nearEdges.[r.Next(0,txts.Length)]<\/td>\n <\/tr>\n |
<\/td>\n | let closest = fst nearEdgesMap.[first].[1]<\/td>\n <\/tr>\n |
<\/td>\n | let items, fcluster = (function | Clusters(leset, letree) -> leset, letree) (find (nearEdges |> Array.map fst) first closest pairs)<\/td>\n <\/tr>\n |
<\/td>\n | \n<\/td>\n <\/tr>\n |
<\/td>\n | let _, outgraph, nodes = toGraph 0 fcluster <\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n |
<\/td>\n | let n = "graph [" + nodes + outgraph + "]"<\/td>\n <\/tr>\n |
<\/td>\n | File.WriteAllText("mbook.gml", n)<\/td>\n <\/tr>\n |
<\/td>\n | <\/td>\n <\/tr>\n<\/table>\n\n\n <\/div>\n\n <\/div>\n \n<\/div>\n\n <\/div>\n |